Packages
#Load all packages
library(plotrix)
library(arulesViz)
#devtools::install_github("rsquaredacademy/olsrr")
library(reshape2)
library(ggcorrplot)
library(spatialreg)
library(rgdal)
library(spgwr)
library(car)
library(caret)
library(spdep)
library(tidyr)
library(olsrr)
library(car)
library(ggplot2)
library(corrplot)
library(janitor)
library(maptools)
library(RColorBrewer)
library(classInt)
library(OpenStreetMap)
library(sp)
library(rgeos)
library(tmap)
library(tmaptools)
library(sf)
library(rgdal)
library(geojsonio)
library(openxlsx)
library(plotly)
library(tidyverse)
library(dplyr)
library(assertive)
library(plyr)
library(naniar)
Read in national child measurement data and convert to a data frame.
######Read in national child measurement data and convert to a data frame.
obesitydata<-read.csv("data/yr6_childmeasurement201819.csv")
obesitydata<-data.frame(obesitydata)
childdata<-obesitydata[-grep("^E1",obesitydata[,2]),]
childdata<- childdata %>% dplyr::filter(!(ONS.Code==""))
Rename LA variable and more data cleasning.
#Rename LA variable
names(childdata)[1] <- c("LA")
childdata<- childdata %>% dplyr::filter(!(LA=="ENGLAND1"))
class(childdata)
## [1] "data.frame"
#check dataset
names(childdata)
## [1] "LA" "ONS.Code"
## [3] "Underweight_number" "Underweight_percent"
## [5] "Lower.CI" "Upper..CI"
## [7] "Healthyweight_number" "Healthyweight_percent"
## [9] "Lower.CI.1" "Upper..CI.1"
## [11] "Overweight_number" "Overweight_percent"
## [13] "Lower.CI.2" "Upper..CI.2"
## [15] "Obese_number" "Obese_percent"
## [17] "Lower.CI.3" "Upper..CI.3"
## [19] "Severely.obese_number" "Severely.obese_percent"
## [21] "Lower.CI.4" "Upper..CI.4"
## [23] "overandobese_number" "overandobese_percent"
## [25] "Lower.CI.5" "Upper..CI.5"
## [27] "Total.number.of.children"
summary(childdata)
## LA ONS.Code Underweight_number Underweight_percent
## Adur : 1 E06000001: 1 12 : 26 x : 22
## Allerdale : 1 E06000002: 1 x : 22 1.009251472: 2
## Amber Valley: 1 E06000003: 1 20 : 15 1.652892562: 2
## Arun : 1 E06000004: 1 11 : 14 0.468854655: 1
## Ashfield : 1 E06000005: 1 16 : 14 0.516647532: 1
## Ashford : 1 E06000006: 1 9 : 14 0.606673407: 1
## (Other) :318 (Other) :318 (Other):219 (Other) :295
## Lower.CI Upper..CI Healthyweight_number Healthyweight_percent
## x : 22 x : 22 x : 22 x : 22
## 0.578266703: 2 1.755778497: 2 565 : 3 53.64311246: 1
## 0.227296963: 1 0.964643488: 1 633 : 3 54.44978954: 1
## 0.27204815 : 1 0.979008108: 1 661 : 3 54.85436893: 1
## 0.278331702: 1 1.103631632: 1 1036 : 2 55.19930676: 1
## 0.293794974: 1 1.134294839: 1 1063 : 2 55.29437136: 1
## (Other) :296 (Other) :296 (Other):289 (Other) :297
## Lower.CI.1 Upper..CI.1 Overweight_number Overweight_percent
## x : 22 x : 22 Min. : 34.0 Min. : 8.744
## 52.0194643 : 1 55.2590729 : 1 1st Qu.: 139.0 1st Qu.:13.144
## 52.3176109 : 1 56.13619347: 1 Median : 186.5 Median :13.972
## 52.75311863: 1 56.26684873: 1 Mean : 260.4 Mean :13.876
## 53.43402389: 1 56.72044107: 1 3rd Qu.: 325.0 3rd Qu.:14.673
## 53.75488029: 1 57.16131779: 1 Max. :2268.0 Max. :18.421
## (Other) :297 (Other) :297
## Lower.CI.2 Upper..CI.2 Obese_number Obese_percent
## Min. : 7.143 Min. :10.66 Min. : 37.0 Min. : 9.528
## 1st Qu.:11.327 1st Qu.:15.04 1st Qu.: 171.5 1st Qu.:15.622
## Median :12.277 Median :15.72 Median : 237.0 Median :18.866
## Mean :12.169 Mean :15.80 Mean : 373.9 Mean :18.846
## 3rd Qu.:13.150 3rd Qu.:16.64 3rd Qu.: 470.0 3rd Qu.:21.661
## Max. :15.541 Max. :21.70 Max. :3980.0 Max. :29.557
##
## Lower.CI.3 Upper..CI.3 Severely.obese_number Severely.obese_percent
## Min. : 7.946 Min. :11.39 35.00 : 9 2.93 : 4
## 1st Qu.:13.542 1st Qu.:17.82 20.00 : 8 1.50 : 3
## Median :16.755 Median :21.00 34.00 : 8 2.22 : 3
## Mean :16.921 Mean :20.96 23.00 : 7 2.57 : 3
## 3rd Qu.:19.994 3rd Qu.:23.82 60.00 : 7 2.97 : 3
## Max. :28.097 Max. :31.06 17.00 : 6 3.09 : 3
## (Other):279 (Other):305
## Lower.CI.4 Upper..CI.4 overandobese_number overandobese_percent
## 2.13 : 4 4.44 : 5 Min. : 71.0 Min. :19.06
## 2.21 : 4 5.73 : 4 1st Qu.: 319.5 1st Qu.:29.48
## 0.83 : 3 3.28 : 3 Median : 419.5 Median :32.84
## 1.28 : 3 4.04 : 3 Mean : 634.2 Mean :32.72
## 1.29 : 3 5.24 : 3 3rd Qu.: 807.2 3rd Qu.:36.15
## 1.92 : 3 5.45 : 3 Max. :6248.0 Max. :44.93
## (Other):304 (Other):303
## Lower.CI.5 Upper..CI.5 Total.number.of.children
## Min. :16.87 Min. :21.46 Min. : 226
## 1st Qu.:26.88 1st Qu.:32.15 1st Qu.: 1001
## Median :30.32 Median :35.59 Median : 1343
## Mean :30.33 Mean :35.22 Mean : 1850
## 3rd Qu.:34.06 3rd Qu.:38.78 3rd Qu.: 2333
## Max. :43.32 Max. :46.64 Max. :15478
##
#Rename ons code variable
names(childdata)[2] <- c("2011GSS_CODE")
#Keep obesity related variables to reduce size of file.
childdata<- childdata[,c( "LA","2011GSS_CODE","Obese_percent","overandobese_percent", "Severely.obese_percent", "Total.number.of.children")]
#Change variables into numeric variables
childdata$Obese_percent_num <- as.numeric(childdata$"Obese_percent")
str(childdata)
## 'data.frame': 324 obs. of 7 variables:
## $ LA : Factor w/ 362 levels "","Adur","Allerdale",..: 69 76 116 136 187 193 206 213 231 276 ...
## $ 2011GSS_CODE : Factor w/ 362 levels "","E06000001",..: 48 6 293 2 3 277 278 56 4 279 ...
## $ Obese_percent : num 22.4 22.5 24.2 26.9 24.7 ...
## $ overandobese_percent : num 37.7 37.6 37.8 43.8 39.7 ...
## $ Severely.obese_percent : Factor w/ 248 levels "","1.16","1.18",..: 200 152 188 234 229 235 192 98 163 227 ...
## $ Total.number.of.children: num 5664 1198 1912 1154 1821 ...
## $ Obese_percent_num : num 22.4 22.5 24.2 26.9 24.7 ...
childdata$Severely.obese_percent<- revalue(childdata$"Severely.obese_percent", c("x"="0"))
childdata$Severely.obese_percentnum <-as.numeric(paste((childdata$"Severely.obese_percent")))
#Create obesity rate variable.
childdata$obesity_rate<- childdata$Obese_percent_num +childdata$Severely.obese_percentnum
childdata<- childdata[,c( "LA","2011GSS_CODE","Obese_percent","overandobese_percent", "obesity_rate", "Total.number.of.children")]
#NCMP data uses 2011 LA ecode, LA code changed for 2019, need to add in new code to allow IMD variables to be fully merged.
childdata$GSS_CODE<- (childdata$"2011GSS_CODE")
#New 2019 code: E06000058 - Bournemouth, Christchurch and Poole - new unitary authority instead of:
#Old code:E06000028 Bournemouth,E06000029 Poole,E07000048 Christchurch
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E06000028"="E06000058","E06000029"="E06000058","E07000048"="E06000058"))
#New 2019 code:E06000059 - Dorset - new unitary authority - (Dorset county abolished)
#Old code:E07000049 East Dorset,E07000050 North Dorset,E07000051 Purbeck,E07000052 West Dorset,E07000053 Weymouth and Portland
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000049"="E06000059","E07000050"="E06000059","E07000051"="E06000059",
"E07000052"="E06000059","E07000053"="E06000059"))
#New 2019 code:E07000244 - East Suffolk - new local authority district (Suffolk Coastal and Waveney districts abolished)
#Old code:E07000205 Suffolk Coastal,E07000206 Waveney
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000205"="E07000244","E07000206"="E07000244"))
#New 2019 code:E07000245 - West Suffolk - new local authority district (Forest Heath and St Edmundsbury districts abolished)
#Old code:E07000201 Forest Heath,E07000204 St. Edmundsbury
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000201"="E07000245","E07000204"="E07000245"))
#New 2019 code:E07000246 - Somerset West and Taunton - new local authority district (Taunton Deane and West Somerset districts abolished)
#Old code:E07000190 Taunton Deane,E07000191 West Somerset
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000190"="E07000246","E07000191"="E07000246"))
#####Fast food shop density file
fastfood<-read.csv("data/fastfood_outlet.csv")
fastfood<-data.frame(fastfood)
is.na(fastfood) <- fastfood == "*"
fastfood$fast_food_density<-as.numeric(paste((fastfood$"Rate.per.100.000.population")))
#La code for this data is for 2018, change some of the codes to 2011 La code to allow full merge into main data.
#change E06000048 Northumberland to E06000057
#change E07000097 East Hertfordshire to E07000242
#change E07000100 St Albans to E07000240
#change E07000101 Stevenage to E07000243
#change E07000104 Welwyn Hatfield to E07000241
#change E08000020 Gateshead to E08000037
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E06000048"="E06000057"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000097"="E07000242"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000100"="E07000240"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000101"="E07000243"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000104"="E07000241"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E08000020"="E08000037"))
names(fastfood)[2] <- c("2011GSS_CODE")
#Merge into main data.
childdata <- merge(x = childdata, y = fastfood, by = "2011GSS_CODE", all.x = TRUE)
#####Ethnicity data uses the 2011 ecode as well, merge in ethnicity data first.
ethnicity<-read.csv("data/census_ethnicity.csv")
ethnicity<-data.frame(ethnicity)
names(ethnicity)[3] <- c("2011GSS_CODE")
#Only keep variables of interest to reduce size of data.
ethnicity_por<- ethnicity[,c("2011GSS_CODE","Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value",
"Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..White..Total..measures..Value",
"Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Mixed.multiple.ethnic.group..Total..measures..Value",
"Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Asian.Asian.British..Total..measures..Value",
"Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Black.African.Caribbean.Black.British..Total..measures..Value",
"Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Other.ethnic.group..Total..measures..Value")]
#Merge with main dataset
childdata2 <- merge(x = childdata, y = ethnicity_por, by = "2011GSS_CODE", all.x = TRUE)
#Summarise the ethnicity and obesity data for the unitary local authorites that were combined in 2019.
childdata2 <-data.frame(aggregate(. ~ GSS_CODE,childdata2, sum))
#Create percentage variables for each ethnic group.
childdata2$percent_white<- childdata2[[ "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..White..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_mix<- childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Mixed.multiple.ethnic.group..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_asian<- childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Asian.Asian.British..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_african<- childdata2[[ "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Black.African.Caribbean.Black.British..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
#New 2019 code: E06000058 - Bournemouth, Christchurch and Poole - new unitary authority instead of:
#Old code:E06000028 Bournemouth,E06000029 Poole,E07000048 Christchurch
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E06000058',(childdata2$obesity_rate)/3,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E06000058',(childdata2$fast_food_density)/3,fast_food_density))
#New 2019 code:E06000059 - Dorset - new unitary authority - (Dorset county abolished)
#Old code:E07000049 East Dorset,E07000050 North Dorset,E07000051 Purbeck,E07000052 West Dorset,E07000053 Weymouth and Portland
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E06000059',(childdata2$obesity_rate)/5,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E06000059',(childdata2$fast_food_density)/5,fast_food_density))
#New 2019 code:E07000244 - East Suffolk - new local authority district (Suffolk Coastal and Waveney districts abolished)
#Old code:E07000205 Suffolk Coastal,E07000206 Waveney
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000244',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000244',(childdata2$fast_food_density)/2,fast_food_density))
#New 2019 code:E07000245 - West Suffolk - new local authority district (Forest Heath and St Edmundsbury districts abolished)
#Old code:E07000201 Forest Heath,E07000204 St. Edmundsbury
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000245',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000245',(childdata2$fast_food_density)/2,fast_food_density))
#New 2019 code:E07000246 - Somerset West and Taunton - new local authority district (Taunton Deane and West Somerset districts abolished)
#Old code:E07000190 Taunton Deane,E07000191 West Somerset
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000246',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000246',(childdata2$fast_food_density)/2,fast_food_density))
######Read in multiple excel tabs for the imd data.
IMD_income <- read.xlsx('data/IMD2019.xlsx',sheet=3)
IMD_employ <- read.xlsx('data/IMD2019.xlsx',sheet=4)
IMD_edu <- read.xlsx('data/IMD2019.xlsx',sheet=5)
IMD_health <- read.xlsx('data/IMD2019.xlsx',sheet=6)
IMD_crime <- read.xlsx('data/IMD2019.xlsx',sheet=7)
IMD_barriers <- read.xlsx('data/IMD2019.xlsx',sheet=8)
IMD_living <- read.xlsx('data/IMD2019.xlsx',sheet=9)
IMD_idaci <- read.xlsx('data/IMD2019.xlsx',sheet=10)
#Put data into data frames.
IMD_income <- data.frame(IMD_income)
IMD_employ <- data.frame(IMD_employ)
IMD_edu <- data.frame(IMD_edu)
IMD_health <- data.frame(IMD_health)
IMD_crime <- data.frame(IMD_crime)
IMD_barriers <- data.frame(IMD_barriers)
IMD_living <- data.frame(IMD_living)
IMD_idaci <- data.frame(IMD_idaci)
#Rename ons code variable
names(childdata)[2] <- c("GSS_CODE")
names(IMD_income)[1] <- c("GSS_CODE")
names(IMD_employ)[1] <- c("GSS_CODE")
names(IMD_edu)[1] <- c("GSS_CODE")
names(IMD_health)[1] <- c("GSS_CODE")
names(IMD_crime)[1] <- c("GSS_CODE")
names(IMD_barriers)[1] <- c("GSS_CODE")
names(IMD_living)[1] <- c("GSS_CODE")
names(IMD_idaci)[1] <- c("GSS_CODE")
#Remove duplicate variables in these files.
IMD_income <-within(IMD_income, rm("Local.Authority.District.name..2019."))
IMD_employ <-within(IMD_employ , rm("Local.Authority.District.name..2019."))
IMD_edu <-within(IMD_edu, rm("Local.Authority.District.name..2019."))
IMD_health <-within(IMD_health, rm("Local.Authority.District.name..2019."))
IMD_crime <-within(IMD_crime, rm("Local.Authority.District.name..2019."))
IMD_barriers <-within(IMD_barriers, rm("Local.Authority.District.name..2019."))
IMD_living <-within(IMD_living, rm("Local.Authority.District.name..2019."))
IMD_idaci <-within(IMD_idaci, rm("Local.Authority.District.name..2019."))
#Merge the datasets
childdata2 <- merge(x = childdata2, y = IMD_income, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_employ, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_edu, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_health, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_crime, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_barriers, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_living, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_idaci, by = "GSS_CODE", all.x = TRUE)
#Physical activity data
activity <- read.xlsx('data/childactivity.xlsx',sheet=1)
activity<-data.frame(activity)
exercise<- activity[,c("La","GSS_CODE","X60minplus_Rate....","fairly_active_Rate....","Less_active_Rate....")]
is.na(exercise) <- exercise == "^"
is.na(exercise) <- exercise == "*"
exercise$active_rate <-as.numeric(paste((exercise$"X60minplus_Rate....")))
exercise$fairly_active_rate <-as.numeric(paste((exercise$"fairly_active_Rate....")))
exercise$less_active_rate <-as.numeric(paste((exercise$"Less_active_Rate....")))
exercise<- exercise[,c("La","GSS_CODE","active_rate", "fairly_active_rate","less_active_rate")]
#Merge with main dataset
childdata2 <- merge(x = childdata2, y =exercise, by = "GSS_CODE", all.x = TRUE)
#Keep only relavent IMD variables. We will use the score variables.
childdata2<- childdata2[,c("GSS_CODE","obesity_rate","fast_food_density","active_rate",
"percent_white",
"percent_mix",
"percent_asian",
"percent_african",
"Income...Average.rank",
"Employment...Average.rank",
"Education..Skills.and.Training...Average.rank",
"Health.Deprivation.and.Disability...Average.rank",
"Crime...Average.rank",
"Barriers.to.Housing.and.Services...Average.rank",
"Living.Environment...Average.rank",
"IDACI...Average.rank",
"Income...Average.score",
"Employment...Average.score",
"Education..Skills.and.Training...Average.score",
"Health.Deprivation.and.Disability...Average.score",
"Crime...Average.score",
"Barriers.to.Housing.and.Services...Average.score",
"Living.Environment...Average.score",
"IDACI...Average.score",
"fairly_active_rate","less_active_rate" )]
names(childdata2)
## [1] "GSS_CODE"
## [2] "obesity_rate"
## [3] "fast_food_density"
## [4] "active_rate"
## [5] "percent_white"
## [6] "percent_mix"
## [7] "percent_asian"
## [8] "percent_african"
## [9] "Income...Average.rank"
## [10] "Employment...Average.rank"
## [11] "Education..Skills.and.Training...Average.rank"
## [12] "Health.Deprivation.and.Disability...Average.rank"
## [13] "Crime...Average.rank"
## [14] "Barriers.to.Housing.and.Services...Average.rank"
## [15] "Living.Environment...Average.rank"
## [16] "IDACI...Average.rank"
## [17] "Income...Average.score"
## [18] "Employment...Average.score"
## [19] "Education..Skills.and.Training...Average.score"
## [20] "Health.Deprivation.and.Disability...Average.score"
## [21] "Crime...Average.score"
## [22] "Barriers.to.Housing.and.Services...Average.score"
## [23] "Living.Environment...Average.score"
## [24] "IDACI...Average.score"
## [25] "fairly_active_rate"
## [26] "less_active_rate"
#Rename variables with clearer names.
names(childdata2)[3] <- "Density of fast food outlets"
names(childdata2)[4] <- "Active rate"
names(childdata2)[5] <- "Percentage of White children"
names(childdata2)[6] <- "Percentage of Mixed children"
names(childdata2)[7] <- "Percentage of Asian children"
names(childdata2)[8] <- "Percentage of African children"
names(childdata2)[17] <- "IMD income average score"
names(childdata2)[18] <- "IMD employment average score"
names(childdata2)[19] <- "IMD education average score"
names(childdata2)[20] <- "IMD health average score"
names(childdata2)[21] <- "IMD crime average score"
names(childdata2)[22] <- "IMD housing average score"
names(childdata2)[23] <- "IMD living average score"
names(childdata2)[24] <- "IMD IDACI average score"
names(childdata2)[25] <- "Fairly active rate"
names(childdata2)[26] <- "Less active rate"
Check dependent variable is normal.Appendix Figure A1
#Check dependent variable is normal.
boxplot(childdata2$obesity_rate,main="Boxplot of obesity rate")
Check transformations of dependent variable and see which one will be normal. Appendix Figure A1
#Check transformations of dependent variable and see which one will be normal.
symbox(~`obesity_rate`, childdata2, na.rm=T, powers=seq(-3,3,by=.5),main="Types of transformation",ylab="Obesity rate", xlab="Power of transformation")
#Based on this, use square root instead.
childdata2$root_ob<- sqrt(childdata2$obesity_rate)
boxplot(childdata2$root_ob,main="Boxplot of transformed obesity rate")
Join main dataset and map data
Appendix Table 1: Univariate regression
#Variable selection.
#Univariate regression first.
model_1 <- lm(root_ob ~ `Density of fast food outlets`, data = modeldata)#significant
summary(model_1)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.27677 -0.29938 -0.00988 0.29868 1.45546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6948563 0.0842665 43.85 <2e-16 ***
## `Density of fast food outlets` 0.0127375 0.0009756 13.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4625 on 313 degrees of freedom
## Multiple R-squared: 0.3526, Adjusted R-squared: 0.3505
## F-statistic: 170.5 on 1 and 313 DF, p-value: < 2.2e-16
model_2 <- lm(root_ob ~ `Active rate`, data = modeldata)#significant
summary(model_2)
##
## Call:
## lm(formula = root_ob ~ `Active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5186 -0.3957 -0.0229 0.4089 1.4139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8446 0.2481 23.558 < 2e-16 ***
## `Active rate` -2.3662 0.5207 -4.544 8.38e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5489 on 266 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.07204, Adjusted R-squared: 0.06855
## F-statistic: 20.65 on 1 and 266 DF, p-value: 8.378e-06
model_3 <- lm(root_ob ~ `Percentage of White children`, data = modeldata)#significant
summary(model_3)
##
## Call:
## lm(formula = root_ob ~ `Percentage of White children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33103 -0.35594 0.01714 0.34855 1.23013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1290 0.1569 39.073 <2e-16 ***
## `Percentage of White children` -1.6286 0.1809 -9.001 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5123 on 313 degrees of freedom
## Multiple R-squared: 0.2056, Adjusted R-squared: 0.2031
## F-statistic: 81.02 on 1 and 313 DF, p-value: < 2.2e-16
model_4 <- lm(root_ob ~ `Percentage of Mixed children`, data = modeldata)#significant
summary(model_4)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42548 -0.41214 0.01901 0.44044 1.19231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.51037 0.05736 78.630 < 2e-16 ***
## `Percentage of Mixed children` 5.98789 1.24793 4.798 2.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5547 on 313 degrees of freedom
## Multiple R-squared: 0.06852, Adjusted R-squared: 0.06554
## F-statistic: 23.02 on 1 and 313 DF, p-value: 2.481e-06
model_5 <- lm(root_ob ~ `Percentage of Asian children`, data = modeldata)#significant
summary(model_5)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Asian children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35098 -0.39116 0.00483 0.37521 1.20279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.56331 0.03763 121.253 < 2e-16 ***
## `Percentage of Asian children` 2.50436 0.32550 7.694 1.87e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5271 on 313 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1564
## F-statistic: 59.2 on 1 and 313 DF, p-value: 1.874e-13
model_6 <- lm(root_ob ~ `Percentage of African children`, data = modeldata)#significant
summary(model_6)
##
## Call:
## lm(formula = root_ob ~ `Percentage of African children`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37075 -0.40101 0.00913 0.40737 1.21564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.61152 0.03298 139.840 < 2e-16 ***
## `Percentage of African children` 4.38738 0.51796 8.471 9.66e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5184 on 313 degrees of freedom
## Multiple R-squared: 0.1865, Adjusted R-squared: 0.1839
## F-statistic: 71.75 on 1 and 313 DF, p-value: 9.661e-16
model_7 <- lm(root_ob ~ `IMD income average score`, data = modeldata)#significant
summary(model_7)
##
## Call:
## lm(formula = root_ob ~ `IMD income average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83874 -0.20839 0.01143 0.19565 0.90327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.53386 0.05152 68.60 <2e-16 ***
## `IMD income average score` 10.38165 0.41299 25.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3308 on 313 degrees of freedom
## Multiple R-squared: 0.6688, Adjusted R-squared: 0.6677
## F-statistic: 631.9 on 1 and 313 DF, p-value: < 2.2e-16
model_8 <- lm(root_ob ~ `IMD employment average score`, data = modeldata)#significant
summary(model_8)
##
## Call:
## lm(formula = root_ob ~ `IMD employment average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.95236 -0.26248 -0.01322 0.21784 1.14273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.63327 0.06408 56.70 <2e-16 ***
## `IMD employment average score` 12.00710 0.65060 18.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3978 on 313 degrees of freedom
## Multiple R-squared: 0.5211, Adjusted R-squared: 0.5196
## F-statistic: 340.6 on 1 and 313 DF, p-value: < 2.2e-16
model_9 <- lm(root_ob ~ `IMD education average score`, data = modeldata)#significant
summary(model_9)
##
## Call:
## lm(formula = root_ob ~ `IMD education average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92965 -0.30765 -0.06097 0.22807 1.38491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.866392 0.064383 60.05 <2e-16 ***
## `IMD education average score` 0.041999 0.002851 14.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4417 on 313 degrees of freedom
## Multiple R-squared: 0.4095, Adjusted R-squared: 0.4076
## F-statistic: 217 on 1 and 313 DF, p-value: < 2.2e-16
model_10 <- lm(root_ob ~ `IMD health average score`, data = modeldata)#significant
summary(model_10)
##
## Call:
## lm(formula = root_ob ~ `IMD health average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96863 -0.24078 -0.00531 0.19580 1.36286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.82447 0.02311 208.74 <2e-16 ***
## `IMD health average score` 0.64498 0.03567 18.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.402 on 313 degrees of freedom
## Multiple R-squared: 0.5109, Adjusted R-squared: 0.5094
## F-statistic: 327 on 1 and 313 DF, p-value: < 2.2e-16
model_11 <- lm(root_ob ~ `IMD crime average score`, data = modeldata)#significant
summary(model_11)
##
## Call:
## lm(formula = root_ob ~ `IMD crime average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23527 -0.27003 -0.00924 0.28329 1.31520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.83803 0.02583 187.33 <2e-16 ***
## `IMD crime average score` 0.71579 0.04895 14.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.443 on 313 degrees of freedom
## Multiple R-squared: 0.4059, Adjusted R-squared: 0.404
## F-statistic: 213.8 on 1 and 313 DF, p-value: < 2.2e-16
model_12 <- lm(root_ob ~ `IMD housing average score`, data = modeldata)#insignificant
summary(model_12)
##
## Call:
## lm(formula = root_ob ~ `IMD housing average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46163 -0.46095 0.01413 0.44805 1.42131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.639841 0.118212 39.250 <2e-16 ***
## `IMD housing average score` 0.004669 0.005240 0.891 0.374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5741 on 313 degrees of freedom
## Multiple R-squared: 0.002531, Adjusted R-squared: -0.0006562
## F-statistic: 0.7941 on 1 and 313 DF, p-value: 0.3735
model_13 <- lm(root_ob ~ `IMD living average score`, data = modeldata)#significant
summary(model_13)
##
## Call:
## lm(formula = root_ob ~ `IMD living average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.31999 -0.40801 0.01659 0.39484 1.30631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.317863 0.078347 55.112 < 2e-16 ***
## `IMD living average score` 0.020840 0.003548 5.874 1.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5455 on 313 degrees of freedom
## Multiple R-squared: 0.09928, Adjusted R-squared: 0.09641
## F-statistic: 34.5 on 1 and 313 DF, p-value: 1.088e-08
model_14 <- lm(root_ob ~ `IMD IDACI average score`, data = modeldata)#significant
summary(model_14)
##
## Call:
## lm(formula = root_ob ~ `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76574 -0.21304 -0.00681 0.20497 0.96490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.52748 0.05158 68.38 <2e-16 ***
## `IMD IDACI average score` 7.91058 0.31361 25.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3301 on 313 degrees of freedom
## Multiple R-squared: 0.6703, Adjusted R-squared: 0.6692
## F-statistic: 636.3 on 1 and 313 DF, p-value: < 2.2e-16
model_15 <- lm(root_ob ~ `Fairly active rate`, data = modeldata)#insignificant
summary(model_15)
##
## Call:
## lm(formula = root_ob ~ `Fairly active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4100 -0.4365 -0.0132 0.4096 1.4411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.9832 0.2022 24.641 <2e-16 ***
## `Fairly active rate` -1.0781 0.8308 -1.298 0.196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.567 on 265 degrees of freedom
## (48 observations deleted due to missingness)
## Multiple R-squared: 0.006315, Adjusted R-squared: 0.002565
## F-statistic: 1.684 on 1 and 265 DF, p-value: 0.1955
model_16 <- lm(root_ob ~ `Less active rate`, data = modeldata)#significant
summary(model_16)
##
## Call:
## lm(formula = root_ob ~ `Less active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.40039 -0.38736 -0.04242 0.38638 1.43481
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.7469 0.1612 23.24 < 2e-16 ***
## `Less active rate` 3.3987 0.5473 6.21 2.03e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5325 on 266 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.1266, Adjusted R-squared: 0.1233
## F-statistic: 38.56 on 1 and 266 DF, p-value: 2.03e-09
Check correlation of model variables.
##Figure 2. Visualise the correlation matrix
#visualise the correlation matrix
ggcorrplot(corr, hc.order = TRUE, type = "lower",
outline.col = "white",tl.cex = 11,
colors = c("#6D9EC1", "white", "#E46726"))
Final model selection based on F test, adjusted R etc.
#Final model selection based on F test, adjusted R etc.
model_a <-lm(root_ob ~`Density of fast food outlets`+ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`+`Less active rate`, data = modeldata)
summary(model_a)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` +
## `Percentage of Asian children` + `Percentage of African children` +
## `IMD education average score` + `IMD health average score` +
## `IMD crime average score` + `IMD living average score` +
## `IMD IDACI average score` + `Less active rate`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78546 -0.19218 0.00912 0.18340 0.74284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.015791 0.190873 21.039 < 2e-16 ***
## `Density of fast food outlets` -0.001369 0.001109 -1.235 0.217986
## `Percentage of Mixed children` -4.626314 1.398842 -3.307 0.001077 **
## `Percentage of Asian children` 1.362550 0.265761 5.127 5.8e-07 ***
## `Percentage of African children` 5.057605 0.589319 8.582 9.0e-16 ***
## `IMD education average score` 0.016403 0.004176 3.928 0.000110 ***
## `IMD health average score` 0.253100 0.072771 3.478 0.000593 ***
## `IMD crime average score` -0.053869 0.057342 -0.939 0.348392
## `IMD living average score` -0.004143 0.002324 -1.783 0.075783 .
## `IMD IDACI average score` 2.928125 0.897886 3.261 0.001260 **
## `Less active rate` 0.306062 0.304036 1.007 0.315043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2655 on 257 degrees of freedom
## (47 observations deleted due to missingness)
## Multiple R-squared: 0.7902, Adjusted R-squared: 0.782
## F-statistic: 96.8 on 10 and 257 DF, p-value: < 2.2e-16
#Remove less active rate
model_b <-lm(root_ob ~`Density of fast food outlets`+ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_b)
##
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` +
## `Percentage of Asian children` + `Percentage of African children` +
## `IMD education average score` + `IMD health average score` +
## `IMD crime average score` + `IMD living average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73966 -0.18434 -0.01111 0.18002 1.23216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.866e+00 1.467e-01 26.345 < 2e-16 ***
## `Density of fast food outlets` -5.435e-05 9.435e-04 -0.058 0.954099
## `Percentage of Mixed children` -3.148e+00 1.348e+00 -2.336 0.020165 *
## `Percentage of Asian children` 1.171e+00 2.258e-01 5.188 3.89e-07 ***
## `Percentage of African children` 4.485e+00 5.398e-01 8.309 3.21e-15 ***
## `IMD education average score` 1.997e-02 3.791e-03 5.268 2.61e-07 ***
## `IMD health average score` 1.730e-01 6.652e-02 2.601 0.009743 **
## `IMD crime average score` -3.687e-02 5.348e-02 -0.689 0.491088
## `IMD living average score` -3.138e-03 2.183e-03 -1.438 0.151476
## `IMD IDACI average score` 2.936e+00 8.213e-01 3.575 0.000406 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2779 on 305 degrees of freedom
## Multiple R-squared: 0.7722, Adjusted R-squared: 0.7655
## F-statistic: 114.9 on 9 and 305 DF, p-value: < 2.2e-16
#Remove fast food shop density
model_c <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+`IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_c)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD crime average score` +
## `IMD living average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7391 -0.1841 -0.0108 0.1802 1.2335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.862160 0.131781 29.307 < 2e-16 ***
## `Percentage of Mixed children` -3.157068 1.335894 -2.363 0.018740 *
## `Percentage of Asian children` 1.170565 0.225049 5.201 3.63e-07 ***
## `Percentage of African children` 4.488435 0.536011 8.374 2.03e-15 ***
## `IMD education average score` 0.020010 0.003727 5.368 1.57e-07 ***
## `IMD health average score` 0.171782 0.062841 2.734 0.006629 **
## `IMD crime average score` -0.037292 0.052893 -0.705 0.481324
## `IMD living average score` -0.003164 0.002132 -1.484 0.138755
## `IMD IDACI average score` 2.930233 0.813056 3.604 0.000366 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2774 on 306 degrees of freedom
## Multiple R-squared: 0.7722, Adjusted R-squared: 0.7663
## F-statistic: 129.7 on 8 and 306 DF, p-value: < 2.2e-16
#Remove living average score
model_d <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD crime average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_d)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD crime average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71569 -0.18310 -0.00798 0.18331 1.16380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.812813 0.127768 29.842 < 2e-16 ***
## `Percentage of Mixed children` -3.215825 1.337921 -2.404 0.016827 *
## `Percentage of Asian children` 1.103218 0.220859 4.995 9.89e-07 ***
## `Percentage of African children` 4.396905 0.533494 8.242 4.99e-15 ***
## `IMD education average score` 0.020879 0.003688 5.661 3.45e-08 ***
## `IMD health average score` 0.156840 0.062151 2.524 0.012122 *
## `IMD crime average score` -0.022629 0.052065 -0.435 0.664138
## `IMD IDACI average score` 2.778762 0.808206 3.438 0.000667 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.278 on 307 degrees of freedom
## Multiple R-squared: 0.7706, Adjusted R-squared: 0.7653
## F-statistic: 147.3 on 7 and 307 DF, p-value: < 2.2e-16
#Remove crime average score instead of living average score
model_e <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score`+ `IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_e)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD living average score` +
## `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7369 -0.1846 -0.0076 0.1777 1.2215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.904688 0.117068 33.354 < 2e-16 ***
## `Percentage of Mixed children` -3.420394 1.281570 -2.669 0.008014 **
## `Percentage of Asian children` 1.136345 0.219573 5.175 4.12e-07 ***
## `Percentage of African children` 4.526474 0.532851 8.495 8.68e-16 ***
## `IMD education average score` 0.019997 0.003724 5.370 1.56e-07 ***
## `IMD health average score` 0.169717 0.062721 2.706 0.007193 **
## `IMD living average score` -0.002884 0.002093 -1.378 0.169209
## `IMD IDACI average score` 2.723392 0.757660 3.594 0.000379 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2772 on 307 degrees of freedom
## Multiple R-squared: 0.7718, Adjusted R-squared: 0.7666
## F-statistic: 148.4 on 7 and 307 DF, p-value: < 2.2e-16
#Remove crime average score and living average score
model_f <-lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`, data = modeldata)
summary(model_f)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16 ***
## `Percentage of Mixed children` -3.378109 1.283071 -2.633 0.008895 **
## `Percentage of Asian children` 1.085460 0.216761 5.008 9.29e-07 ***
## `Percentage of African children` 4.425927 0.528602 8.373 2.00e-15 ***
## `IMD education average score` 0.020823 0.003681 5.657 3.52e-08 ***
## `IMD health average score` 0.156375 0.062060 2.520 0.012249 *
## `IMD IDACI average score` 2.657163 0.757236 3.509 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
Stepwise AIC and best subset variable selection (reference only).
# stepwise aic regression
#Run for all variables
ols_step_both_aic(model_a)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Less active rate` addition 155.185 27.175 59.176 0.68529 0.68292
## `Percentage of African children` addition 117.903 23.470 62.881 0.72820 0.72511
## `IMD education average score` addition 96.236 21.486 64.865 0.75117 0.74739
## `IMD health average score` addition 83.766 20.357 65.994 0.76425 0.75975
## `Percentage of Asian children` addition 74.845 19.544 66.807 0.77367 0.76846
## `Percentage of Mixed children` addition 63.230 18.576 67.775 0.78488 0.77908
## `IMD living average score` addition 61.559 18.323 68.028 0.78780 0.78125
## `Density of fast food outlets` addition 61.427 18.178 68.173 0.78948 0.78214
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_a)
ols_step_both_aic(model_a, details = FALSE)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Less active rate` addition 155.185 27.175 59.176 0.68529 0.68292
## `Percentage of African children` addition 117.903 23.470 62.881 0.72820 0.72511
## `IMD education average score` addition 96.236 21.486 64.865 0.75117 0.74739
## `IMD health average score` addition 83.766 20.357 65.994 0.76425 0.75975
## `Percentage of Asian children` addition 74.845 19.544 66.807 0.77367 0.76846
## `Percentage of Mixed children` addition 63.230 18.576 67.775 0.78488 0.77908
## `IMD living average score` addition 61.559 18.323 68.028 0.78780 0.78125
## `Density of fast food outlets` addition 61.427 18.178 68.173 0.78948 0.78214
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_a)
## Best Subsets Regression
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Model Index Predictors
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## 1 `IMD IDACI average score`
## 2 `Percentage of African children` `IMD IDACI average score`
## 3 `Percentage of African children` `IMD education average score` `IMD IDACI average score`
## 4 `Percentage of African children` `IMD education average score` `IMD health average score` `Less active rate`
## 5 `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `Less active rate`
## 6 `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD IDACI average score` `Less active rate`
## 7 `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD IDACI average score` `Less active rate`
## 8 `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD living average score` `IMD IDACI average score` `Less active rate`
## 9 `Density of fast food outlets` `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD living average score` `IMD IDACI average score` `Less active rate`
## 10 `Density of fast food outlets` `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD crime average score` `IMD living average score` `IMD IDACI average score` `Less active rate`
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##
## Subsets Regression Summary
## ---------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## ---------------------------------------------------------------------------------------------------------------------------------
## 1 0.6703 0.6692 0.6658 172.7044 199.5678 -696.0019 210.8255 0.1096 0.1096 3e-04 0.3339
## 2 0.7096 0.7078 0.703 116.9879 161.5430 -734.0873 176.5532 0.0972 0.0972 3e-04 0.2960
## 3 0.7439 0.7414 0.7359 68.6655 123.9438 -771.3316 142.7066 0.0862 0.0862 3e-04 0.2627
## 4 0.7588 0.7552 0.7467 37.4353 87.8608 -673.6362 109.4067 0.0807 0.0807 3e-04 0.2503
## 5 0.7694 0.7650 0.7565 26.5443 77.9041 -683.2700 103.0410 0.0778 0.0777 3e-04 0.2412
## 6 0.7780 0.7729 0.763 17.9586 69.6703 -691.0849 98.3982 0.0754 0.0754 3e-04 0.2339
## 7 0.7849 0.7791 0.7677 11.5270 63.2300 -697.0501 95.5489 0.0737 0.0736 3e-04 0.2284
## 8 0.7878 0.7812 0.7696 9.9416 61.5587 -698.4363 97.4685 0.0732 0.0731 3e-04 0.2269
## 9 0.7895 0.7821 0.7694 9.8825 61.4273 -698.3421 100.9282 0.0732 0.0731 3e-04 0.2268
## 10 0.7902 0.7820 0.7682 11.0000 62.5086 -697.1045 105.6004 0.0735 0.0734 3e-04 0.2278
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
#Run for model f
ols_step_both_aic(model_f)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Percentage of African children` addition 161.543 30.028 73.380 0.70962 0.70776
## `IMD education average score` addition 123.944 26.481 76.928 0.74392 0.74145
## `Percentage of Asian children` addition 109.707 25.150 78.258 0.75679 0.75365
## `Percentage of Mixed children` addition 99.939 24.228 79.180 0.76570 0.76191
## `IMD health average score` addition 95.512 23.739 79.670 0.77044 0.76596
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_f)
ols_step_both_aic(model_f, details = FALSE)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Variable Method AIC RSS Sum Sq R-Sq Adj. R-Sq
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score` addition 199.568 34.097 69.312 0.67027 0.66922
## `Percentage of African children` addition 161.543 30.028 73.380 0.70962 0.70776
## `IMD education average score` addition 123.944 26.481 76.928 0.74392 0.74145
## `Percentage of Asian children` addition 109.707 25.150 78.258 0.75679 0.75365
## `Percentage of Mixed children` addition 99.939 24.228 79.180 0.76570 0.76191
## `IMD health average score` addition 95.512 23.739 79.670 0.77044 0.76596
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_f)
## Best Subsets Regression
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Model Index Predictors
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## 1 `IMD IDACI average score`
## 2 `Percentage of African children` `IMD IDACI average score`
## 3 `Percentage of African children` `IMD education average score` `IMD IDACI average score`
## 4 `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score`
## 5 `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD IDACI average score`
## 6 `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD IDACI average score`
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##
## Subsets Regression Summary
## ---------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## ---------------------------------------------------------------------------------------------------------------------------------
## 1 0.6703 0.6692 0.6658 131.3857 199.5678 -695.6811 210.8255 0.1096 0.1096 3e-04 0.3339
## 2 0.7096 0.7078 0.703 80.5994 161.5430 -733.6105 176.5532 0.0972 0.0972 3e-04 0.2960
## 3 0.7439 0.7414 0.7359 36.5756 123.9438 -770.6667 142.7066 0.0862 0.0862 3e-04 0.2627
## 4 0.7587 0.7556 0.75 18.7370 107.2089 -786.9937 129.7243 0.0818 0.0818 3e-04 0.2491
## 5 0.7657 0.7619 0.7542 11.3492 99.9394 -793.9671 126.2074 0.0799 0.0799 3e-04 0.2434
## 6 0.7704 0.7660 0.7569 7.0000 95.5119 -798.1022 125.5325 0.0788 0.0788 3e-04 0.2400
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
#save the residuals into dataframe
finalmodel<- lm(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`, data = modeldata)
summary(finalmodel)
##
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16 ***
## `Percentage of Mixed children` -3.378109 1.283071 -2.633 0.008895 **
## `Percentage of Asian children` 1.085460 0.216761 5.008 9.29e-07 ***
## `Percentage of African children` 4.425927 0.528602 8.373 2.00e-15 ***
## `IMD education average score` 0.020823 0.003681 5.657 3.52e-08 ***
## `IMD health average score` 0.156375 0.062060 2.520 0.012249 *
## `IMD IDACI average score` 2.657163 0.757236 3.509 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
#Check multicollinearity
vif(finalmodel)
## `Percentage of Mixed children` `Percentage of Asian children`
## 4.220859 1.598609
## `Percentage of African children` `IMD education average score`
## 3.631903 4.220017
## `IMD health average score` `IMD IDACI average score`
## 6.346407 8.240346
ObesityMAP$`Model residuals`<- finalmodel$residuals
Figure 3. Diagnostic plots for OLS regression model
#residual plot
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(finalmodel) # Plot the model residuals
Cross-validation of model
###Cross-validation of model
#####Cross-Validation
set.seed(48)
regressdata<-modeldata[,c("Percentage of Mixed children","Percentage of Asian children",
"Percentage of African children","IMD education average score",
"IMD health average score","IMD IDACI average score",
"root_ob" )]
model<- train(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,
regressdata,
method="lm",
trControl=trainControl(method="repeatedcv",
number=10,
verboseIter=TRUE))
## + Fold01.Rep1: intercept=TRUE
## - Fold01.Rep1: intercept=TRUE
## + Fold02.Rep1: intercept=TRUE
## - Fold02.Rep1: intercept=TRUE
## + Fold03.Rep1: intercept=TRUE
## - Fold03.Rep1: intercept=TRUE
## + Fold04.Rep1: intercept=TRUE
## - Fold04.Rep1: intercept=TRUE
## + Fold05.Rep1: intercept=TRUE
## - Fold05.Rep1: intercept=TRUE
## + Fold06.Rep1: intercept=TRUE
## - Fold06.Rep1: intercept=TRUE
## + Fold07.Rep1: intercept=TRUE
## - Fold07.Rep1: intercept=TRUE
## + Fold08.Rep1: intercept=TRUE
## - Fold08.Rep1: intercept=TRUE
## + Fold09.Rep1: intercept=TRUE
## - Fold09.Rep1: intercept=TRUE
## + Fold10.Rep1: intercept=TRUE
## - Fold10.Rep1: intercept=TRUE
## Aggregating results
## Fitting final model on full training set
summary(model)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71559 -0.18097 -0.01082 0.18078 1.16015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.842304 0.108118 35.538 < 2e-16
## `\\`Percentage of Mixed children\\`` -3.378109 1.283071 -2.633 0.008895
## `\\`Percentage of Asian children\\`` 1.085460 0.216761 5.008 9.29e-07
## `\\`Percentage of African children\\`` 4.425927 0.528602 8.373 2.00e-15
## `\\`IMD education average score\\`` 0.020823 0.003681 5.657 3.52e-08
## `\\`IMD health average score\\`` 0.156375 0.062060 2.520 0.012249
## `\\`IMD IDACI average score\\`` 2.657163 0.757236 3.509 0.000517
##
## (Intercept) ***
## `\\`Percentage of Mixed children\\`` **
## `\\`Percentage of Asian children\\`` ***
## `\\`Percentage of African children\\`` ***
## `\\`IMD education average score\\`` ***
## `\\`IMD health average score\\`` *
## `\\`IMD IDACI average score\\`` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.766
## F-statistic: 172.3 on 6 and 308 DF, p-value: < 2.2e-16
#print R2, MAE and RMSE
#print R2, MAE and RMSE
print(model)
## Linear Regression
##
## 315 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 283, 284, 283, 284, 283, 283, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.2795769 0.7716322 0.2217382
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
#Now check for spatial auto-correlation
ObesityMAP <- as(ObesityMAP,"Spatial")
names(ObesityMAP)
## [1] "GSS_CODE"
## [2] "FID"
## [3] "LAD19NM"
## [4] "LAD19NMW"
## [5] "BNG_E"
## [6] "BNG_N"
## [7] "LONG"
## [8] "LAT"
## [9] "Shape__Area"
## [10] "Shape__Length"
## [11] "obesity_rate"
## [12] "Density.of.fast.food.outlets"
## [13] "Active.rate"
## [14] "Percentage.of.White.children"
## [15] "Percentage.of.Mixed.children"
## [16] "Percentage.of.Asian.children"
## [17] "Percentage.of.African.children"
## [18] "Income...Average.rank"
## [19] "Employment...Average.rank"
## [20] "Education..Skills.and.Training...Average.rank"
## [21] "Health.Deprivation.and.Disability...Average.rank"
## [22] "Crime...Average.rank"
## [23] "Barriers.to.Housing.and.Services...Average.rank"
## [24] "Living.Environment...Average.rank"
## [25] "IDACI...Average.rank"
## [26] "IMD.income.average.score"
## [27] "IMD.employment.average.score"
## [28] "IMD.education.average.score"
## [29] "IMD.health.average.score"
## [30] "IMD.crime.average.score"
## [31] "IMD.housing.average.score"
## [32] "IMD.living.average.score"
## [33] "IMD.IDACI.average.score"
## [34] "Fairly.active.rate"
## [35] "Less.active.rate"
## [36] "root_ob"
## [37] "Model.residuals"
#Calculate centriod of each LA.
coordsW <- coordinates(ObesityMAP)
plot(coordsW)
#Neighbours list of queens contiguity
LWard_nb <- poly2nb(ObesityMAP, queen=T)
#and nearest neighbours
knn_wards <- knearneigh(coordsW, k=4)
LWard_knn <- knn2nb(knn_wards)
#plot and add a map
plot(LWard_nb, coordinates(coordsW), col="red")
plot(LWard_knn, coordinates(coordsW), col="blue")
plot(ObesityMAP)
#create a spatial weights matrix
Lward.queens_weight <- nb2listw(LWard_nb, style="C",zero.policy=TRUE)
Lward.knn_4_weight <- nb2listw(LWard_knn, style="C",zero.policy=TRUE)
#Run Moran’s I using the residuals from our final model #first using queens neighbours
moran.test(ObesityMAP@data$`Model.residuals`, Lward.queens_weight,zero.policy=TRUE)
##
## Moran I test under randomisation
##
## data: ObesityMAP@data$Model.residuals
## weights: Lward.queens_weight n reduced by no-neighbour observations
##
##
## Moran I statistic standard deviate = 7.9544, p-value = 9e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.271642496 -0.003194888 0.001193815
#Then knn = 4
moran.test(ObesityMAP@data$`Model.residuals`, Lward.knn_4_weight,zero.policy=TRUE)
##
## Moran I test under randomisation
##
## data: ObesityMAP@data$Model.residuals
## weights: Lward.knn_4_weight
##
## Moran I statistic standard deviate = 7.8037, p-value = 3.005e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.285285412 -0.003184713 0.001366461
##GWR(Andy MacLachlan and Adam Dennett,2019)
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,data = modeldata,coords=coordsW,adapt=T)
## Adaptive q: 0.381966 CV score: 22.7907
## Adaptive q: 0.618034 CV score: 23.71091
## Adaptive q: 0.236068 CV score: 22.32975
## Adaptive q: 0.145898 CV score: 22.26535
## Adaptive q: 0.1565004 CV score: 22.27587
## Adaptive q: 0.09016994 CV score: 21.82901
## Adaptive q: 0.05572809 CV score: 21.04139
## Adaptive q: 0.03444185 CV score: 20.49357
## Adaptive q: 0.02128624 CV score: 22.45724
## Adaptive q: 0.04257247 CV score: 20.67178
## Adaptive q: 0.02941686 CV score: 20.76158
## Adaptive q: 0.03659131 CV score: 20.55597
## Adaptive q: 0.03252248 CV score: 20.55079
## Adaptive q: 0.03451283 CV score: 20.49199
## Adaptive q: 0.03530674 CV score: 20.49814
## Adaptive q: 0.03479792 CV score: 20.48601
## Adaptive q: 0.03499227 CV score: 20.4862
## Adaptive q: 0.03488435 CV score: 20.48431
## Adaptive q: 0.03492504 CV score: 20.48376
## Adaptive q: 0.03492504 CV score: 20.48376
#run the gwr model
gwr.model = gwr(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+ `IMD education average score`+ `IMD health average score` + `IMD IDACI average score`,data = modeldata,coords=coordsW, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)
#print the results of the model
gwr.model
## Call:
## gwr(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` +
## `Percentage of African children` + `IMD education average score` +
## `IMD health average score` + `IMD IDACI average score`, data = modeldata,
## coords = coordsW, adapt = GWRbandwidth, hatmatrix = TRUE,
## se.fit = TRUE)
## Kernel function: gwr.Gauss
## Adaptive quantile: 0.03492504 (about 11 of 315 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median
## X.Intercept. 2.2379325 2.9897034 3.4863385
## X.Percentage.of.Mixed.children. -22.5861202 -3.5869974 -0.8864863
## X.Percentage.of.Asian.children. -2.5524665 0.2057791 1.1725359
## X.Percentage.of.African.children. -8.5687184 2.1280087 3.3279073
## X.IMD.education.average.score. -0.0072453 0.0185664 0.0262770
## X.IMD.health.average.score. -0.6729346 -0.1458490 -0.0352223
## X.IMD.IDACI.average.score. -1.1677115 2.9253575 4.3309386
## 3rd Qu. Max. Global
## X.Intercept. 3.9000246 4.5576890 3.8423
## X.Percentage.of.Mixed.children. 1.4186154 14.1164053 -3.3781
## X.Percentage.of.Asian.children. 1.5801901 2.7330463 1.0855
## X.Percentage.of.African.children. 4.1428690 10.3270702 4.4259
## X.IMD.education.average.score. 0.0332300 0.0485405 0.0208
## X.IMD.health.average.score. 0.0863856 0.3934210 0.1564
## X.IMD.IDACI.average.score. 5.3997008 9.6079104 2.6572
## Number of data points: 315
## Effective number of parameters (residual: 2traceS - traceS'S): 92.88358
## Effective degrees of freedom (residual: 2traceS - traceS'S): 222.1164
## Sigma (residual: 2traceS - traceS'S): 0.2308206
## Effective number of parameters (model: traceS): 68.62676
## Effective degrees of freedom (model: traceS): 246.3732
## Sigma (model: traceS): 0.2191634
## Sigma (ML): 0.1938249
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 39.72662
## AIC (GWR p. 96, eq. 4.22): -71.14606
## Residual sum of squares: 11.83395
## Quasi-global R2: 0.8855612
#Attach coefficients to original dataframe
results<-as.data.frame(gwr.model$SDF)
names(results)
## [1] "sum.w"
## [2] "X.Intercept."
## [3] "X.Percentage.of.Mixed.children."
## [4] "X.Percentage.of.Asian.children."
## [5] "X.Percentage.of.African.children."
## [6] "X.IMD.education.average.score."
## [7] "X.IMD.health.average.score."
## [8] "X.IMD.IDACI.average.score."
## [9] "X.Intercept._se"
## [10] "X.Percentage.of.Mixed.children._se"
## [11] "X.Percentage.of.Asian.children._se"
## [12] "X.Percentage.of.African.children._se"
## [13] "X.IMD.education.average.score._se"
## [14] "X.IMD.health.average.score._se"
## [15] "X.IMD.IDACI.average.score._se"
## [16] "gwr.e"
## [17] "pred"
## [18] "pred.se"
## [19] "localR2"
## [20] "X.Intercept._se_EDF"
## [21] "X.Percentage.of.Mixed.children._se_EDF"
## [22] "X.Percentage.of.Asian.children._se_EDF"
## [23] "X.Percentage.of.African.children._se_EDF"
## [24] "X.IMD.education.average.score._se_EDF"
## [25] "X.IMD.health.average.score._se_EDF"
## [26] "X.IMD.IDACI.average.score._se_EDF"
## [27] "pred.se.1"
## [28] "coord.x"
## [29] "coord.y"
ObesityMAP@data$coefr2<-results$localR2
ObesityMAP@data$coefmixed<-results$X.Percentage.of.Mixed.children.
ObesityMAP@data$coefafrican<-results$X.Percentage.of.African.children.
ObesityMAP@data$coefedu<-results$X.IMD.education.average.score.
ObesityMAP@data$coefhealth<-results$X.IMD.health.average.score.
ObesityMAP@data$coefasian<-results$X.Percentage.of.Asian.children.
ObesityMAP@data$coefidaci<-results$X.IMD.IDACI.average.score.
#Local R-squared
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
tm_polygons(col = "coefr2", title = "Local R-squared", breaks = c(-Inf,0.7,0.75,0.8,0.85,0.9,0.95, Inf), palette = "YlGnBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
tm_polygons(col = "coefafrican", title = "Coefficient for percentage of African children", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefafrican" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Percentage of mixed children
tm_shape(ObesityMAP) +
tm_polygons(col = "coefmixed", title = "Coefficient for percentage of Mixed children",breaks = c(-Inf,-20,-10,-5,0,5,10,15, Inf), palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefmixed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Percentage of Asian children
tm_shape(ObesityMAP) +
tm_polygons(col = "coefasian", title = "Coefficient for percentage of Asian children", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefasian" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IDACI score
tm_shape(ObesityMAP) +
tm_polygons(col = "coefidaci", title = "Coefficient for IMD IDACI average acore", breaks = c(-Inf,-2,0,2,4,6,8, Inf), palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefidaci" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IMD Health score
tm_shape(ObesityMAP) +
tm_polygons(col = "coefhealth", title = "Coefficient for IMD Health average acore", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefhealth" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#IMD education
tm_shape(ObesityMAP) +
tm_polygons(col = "coefedu", title = "Coefficient for IMD Education average acore", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefedu" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#render("Report code.Rmd", output_dir = output_dir, params = list(output_dir = output_dir))
modeldata2<- childdata2[,c("GSS_CODE","root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
Cluster analysis(Guy Lansley and James Cheshire,2018)
clusterdata<- childdata2[,c("root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
value <- colnames(clusterdata)
# creates a new data frame
stand_data <- clusterdata
for(i in 1: ncol (clusterdata)){
stand_data[, value[i]] <- scale(as.numeric(modeldata2[, value[i]]))
}
#K-means clustering (k=5)
Km <- kmeans(stand_data, 5, nstart = 25, iter.max = 1000)
KmClusters <- as.matrix(Km$cluster)
KmClusters <- as.data.frame(KmClusters)
KmCenters <- as.matrix(Km$centers)
KmCenters <- as.data.frame(KmCenters)
#Number of LAs in each cluster
table(KmClusters)
## KmClusters
## 1 2 3 4 5
## 80 109 80 21 25
#Validate choice of k
# Total within-cluster sum of square
wss <- NULL
for (i in 1:15) wss[i] <- kmeans(stand_data,centers = i,iter.max = 1000)$tot.withinss
plot(1:15, wss, type = "b", pch = 19, xlab = "Number of Clusters",
ylab = "Total within-cluster sum of squares")
#Cluster plot of the first 2 principal components
library(cluster)
clusplot(stand_data,Km$cluster, color = TRUE, shade = FALSE,
labels = 4, lines = 0, plotchar = FALSE)
#More cluster plot
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(Km, data = stand_data, geom = "point", ellipse = F, pointsize = 0.5,
ggtheme = theme_classic())
#Radial plot for each group
library(plotrix)
KmCenters[5,]<- c(0)
par(cex.axis = 0.8, cex.lab = 0.8)
radial.plot(KmCenters[c(1,5),], labels = colnames(KmCenters),
boxed.radial = FALSE, show.radial.grid = TRUE,
line.col = c("blue", "red"), radlab = TRUE,
rp.type = "p", show.grid.labels = 3)
##Creates a map of cluster groups
#Join the cluster labels to the GSS codes
GSScode<- childdata2[,c(1,2)]
Classification <- as.data.frame(cbind(as.character(GSScode[,1]), KmClusters[,1]))
names(Classification) <- c("GSS_CODE", "Classification")
OA.Class<- merge(ObesityMAP, Classification, by.x = "GSS_CODE", by.y = "GSS_CODE",duplicateGeoms = TRUE)
# creates a map in R
tm_shape(OA.Class) +
tm_polygons(col = "Classification", title = "Cluster groups", palette = "RdBu", alpha = 1)+
tm_format_World(title = NA,legend.width = 0.8, scale = 1,
legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
Reference: Andy MacLachlan and Adam Dennett, 2019. CASA0005 Geographic Information Systems and Science. Available from: https://andrewmaclachlan.github.io/CASA0005repo/index.html
Guy Lansley and James Cheshire,2018.Creating a Geodemographic Classification Using K-means Clustering in R.Available from: https://data.cdrc.ac.uk/tutorial/creating-a-geodemographic-classification-using-k-means-clustering-in-r